BRIC Project
# clear workspace
##rm(list=ls())
getting the current WD
getwd()
changing the WD
##setwd("/Users/Manu/Desktop/TUM_Master_Mgt_Technology/TUM_SS_21/Empirical Asset Pricing seminar/Seminar Thesis/BRIC_data")
# loading libraries
library(data.table) # extension of the data.frame package. It is widely used for fast aggregation of large datasets, low latency add/update/remove of columns, quicker ordered joins, and a fast file reader.
library(dplyr) # data manipulation package
library(lubridate)
library(zoo) # methods for totally ordered indexed observations. It aims at performing calculations containing irregular time series of numeric vectors, matrices & factors
library(stats)
library(utils)
library(tidyverse)
Data column descriptions (Worldscope): https://www.professors.wi.tum.de/fileadmin/w00bca/fm/Worldscope_Data_Definition_Guide_Issue_15.pdf
https://docs.google.com/spreadsheets/d/1YtuJiv60Q6nKIaFJLQY60sGQErbsl_8nvPdUHmvO8vM/edit?usp=sharing
memory.limit(9999999999)
[1] 1e+10
# save raw data and adjust others to the required date range
BRIC.monthly.raw <- BRIC.monthly
BRIC.monthly <- subset(BRIC.monthly, Date >= "1994-06-30" & Date <= "2019-01-01")
BRIC.monthly <- na.omit(BRIC.monthly,cols = "RET.USD")
BRIC.yearly.raw <- BRIC.yearly
BRIC.yearly <- subset(BRIC.yearly, YEAR >= "1994" & YEAR <= "2019")
ISIN: International Security Identification Number (stock identifier) ESTAT: active vs inactive company (publicly listed or not) Id: join column with BRIC.yearly dataframe INDM: industry sector code
GEOGN: geographic group name GEOLN: geographic location
List of Database codes: https://www.bwl.uni-mannheim.de/media/Lehrstuehle/bwl/Maug/Database_info/Datastream_dataypes.pdf
ID: ?? Country: 4 BRIC COUNTRY CODES ICBSUC: industrial classification benchmark https://link.springer.com/content/pdf/bbm%3A978-3-8350-9531-1%2F1.pdf
WC07021: SIC(standard industrial classification) primary code from Worldscope W05651: Common Shares Traded - Annual (Security)
TO DO’s:
Project Dates: Thesis submission: June 21 Final presentation: June 07
Strategy: GDP weighted countries; stock level: max sharpe ratio, min volatility, equal sector weights or quotas, momentum?
EDA
# finding the number of NA values by column in the dataframe
BRIC.monthly %>% mutate_all(is.na) %>% summarize_all(sum)
To DO: compute correlations by sector (10 sectors)
# regular correlation matrix of all (four) numeric attributes
cor(select(BRIC.static, where(is.numeric)))
Some conventions:
Characteristic should be calculated as in Hanauer & Lauterbach (2019) or in Hanauer (2020)
Big stocks should be defined as the biggest stocks which together account for 90% of a country’s aggregated market capitalization Benchmark should be defined as the cap-weighted universe of big stocks Returns should be in USD Breakpoints (for Fama-French factors) should be calculated on big stocks (as in the excursus) but both small and big stocks go into the factor calculation.
length(BRIC.static$Id)
length(BRIC.yearly$Id)
# joining the monthly and the static dataframe by the Id column (retainig all rows of BRIC.yearly)
df_combined <- left_join(x=BRIC.monthly,y=BRIC.static,by="Id")
head(df_combined)
summary(df_combined$MV.USD)
## finding the number of NA values by column in the dataframe
BRIC.yearly %>% mutate_all(is.na) %>% summarize_all(sum)
# markt cap (USD): WC07210
## finding the number of NA values by column in the dataframe
df_combined %>% mutate_all(is.na) %>% summarize_all(sum)
Idea: calculate 90% of the sum of all stock market caps per country as a time series (store the result in a column )
# Big stocks should be defined as the biggest stocks which together account for 90% of # a country's aggregated market capitalization
# grouping the mcap values of individual stocks by country
mcap_country <- df_combined %>%
filter(df_combined$MV.USD != "NA") %>%
group_by(country.x, ym)
# computing the sum of the individual stock's market caps by country
country_mcap <- mcap_country %>%
group_by(country.x, ym) %>%
summarize(mc_sum = sum(MV.USD))
country_mcap
summary(country_mcap$mc_sum[country_mcap$country.x == "BRA"])
summary(country_mcap$mc_sum[country_mcap$country.x == "RUS"])
summary(country_mcap$mc_sum[country_mcap$country.x == "IND"])
summary(country_mcap$mc_sum[country_mcap$country.x == "CHN"])
## 1 month t-bill rate
# load data sheet from French's website
library(readr)
FFData <- read_csv("FF_Research_Data_5_Factors_2x3.CSV",
skip = 2)
Missing column names filled in: 'X1' [1]
-- Column specification -------------------------------------------------------------------------------------------------------------------------
cols(
X1 = col_character(),
`Mkt-RF` = col_character(),
SMB = col_character(),
HML = col_character(),
RMW = col_character(),
CMA = col_character(),
RF = col_character()
)
1 parsing failure.
row col expected actual file
694 -- 7 columns 1 columns 'FF_Research_Data_5_Factors_2x3.CSV'
# adding a ym column to risk free rate data
one_m_tbill <- as.data.frame(FFData[c("X1","RF")][1:693,])
one_m_tbill$ym<-as.yearmon(one_m_tbill$X1, "%Y %m")
# merge risk-free rate (1 month treasury bill rate) with monthly data
BRIC.monthly_full <- left_join(x = BRIC.monthly, y = one_m_tbill, by = "ym")
# make rf column numeric
BRIC.monthly_full$RF <- as.numeric(BRIC.monthly_full$RF)
BRIC.monthly_full$RMRF <- BRIC.monthly_full$RET.USD - BRIC.monthly_full$RF
comment: na values appear for size calculation, which differs from what in the excursus note: we omitted the na’s for further calculation –> has to be clarified in the Q&A session with Hanauer
we didn’t calculate the lagged MV yet lines 271,315
## Determine portfolio breakpoints for Size
# Fama-French take the MV from end-of-June and rebalance yearly
BRIC.monthly_full[,month := month(Date)]
BRIC.monthly_full[,year := year(Date)]
BRIC.monthly_full[,hcjun := ifelse(month>=7,year,year-1)]
# determine size portfolio allocation from July on using data that's public from end-of-June on
# we didn't calculate the lagged MV yet
setorder(BRIC.monthly_full,Date,-MV.USD)
hlpvariable <- BRIC.monthly_full[month==7 & !is.na(MV.USD),
.(Size = ifelse((cumsum(MV.USD)/sum(MV.USD))>=0.9,"Small","Big"),Id),
by=year]
# Merge the size portfolio allocation back from July Y to June Y+1
panel_country <- merge(BRIC.monthly_full,hlpvariable,
by.x=c("hcjun","Id"),
by.y=c("year","Id"),
all.x=T)
panel_country_noNA <- na.omit(panel_country,cols = "Size")
# merge yearly BM data with our table
BRIC.yearly_helper <- subset( BRIC.yearly, select = c("Id","country","ICBSUC","YEAR","WC03501","WC07210","WC01001","WC01051","WC01101","WC01251","WC02999") )
BRIC.yearly_helper$BM <- BRIC.yearly_helper$WC03501 / BRIC.yearly_helper$WC07210
# delete NA s from BM column
BRIC.yearly_helper <- na.omit(BRIC.yearly_helper,cols = "BM")
summary(BRIC.yearly_helper$BM)
Min. 1st Qu. Median Mean 3rd Qu. Max.
-119910.70 1.96 6.14 Inf 42.55 Inf
# eliminating unused columns
panel_country_noNA <- subset(panel_country_noNA, select = c("hcjun","Id","country","MV.USD","RET.USD","ym","RF","month","year","Size","Date"))
panel_country_BM <- inner_join(x = panel_country_noNA,y = BRIC.yearly_helper, by = "Id")
# Determine the B/M breakpoints based on big stocks only
hlpvariable2 <- panel_country_BM[month==7 & !is.na(BM) & Size=="Big", .(bm_bb30 = quantile(BM , probs = c(0.3), na.rm=T),
bm_bb70 = quantile(BM , probs = c(0.7), na.rm=T)),by=year]
# Merge the B/M portfolio allocation back from July Y to June Y+1
panel_country_new <- merge(panel_country_BM,hlpvariable2,
by.x=c("hcjun"),
by.y=c("year"),
all.x=T)
panel_country_new[ , pf.bm := ifelse(BM>bm_bb70,"High",ifelse((BM<=bm_bb70 & BM>bm_bb30),"Neutral",ifelse(BM<=bm_bb30,"Low",NA)))]
panel_country_new[, SIZE_VALUE := paste0(Size,".",pf.bm)]
# we used MV.USD instead of the lagged MV
portfolio_returns <- panel_country_new[!is.na(Size) & !is.na(pf.bm)] %>% # this operator nests functions
group_by(Date,SIZE_VALUE) %>% # do "everything" for the groups specified here
summarize(ret.port = weighted.mean(RET.USD,
MV.USD)) %>% # vw returns using lagged mcap
spread(SIZE_VALUE,ret.port) %>% # create one column for each group
mutate(
Small = (Small.High + Small.Neutral + Small.Low)/3, # just exemplary
Big = (Big.High + Big.Neutral + Big.Low)/3,
SMB = Small-Big,
High = (Small.High + Big.High)/2,
Low = (Small.Low + Big.Low)/2,
HML = High-Low
)
`summarise()` has grouped output by 'Date'. You can override using the `.groups` argument.
portfolio_returns <- as.data.table(portfolio_returns)
head(portfolio_returns)
NA
profitability: OP/BE: Operating profits-to-book equity. We measure operating profits-to-book equity as in Fama and French (2015) as sales or revenues (WC01001) minus cost of goods sold (WC01051), minus selling, general, and administrative expenses (WC01101), minus interest expense (WC01251); all divided by book equity. (Hanauer, 2019, p. 284)
# use BRIC.yearly_helper
investment: As in Cooper et al. (2008), we measure asset growth in June of year y as the percentage change in total assets (WC02999) from fiscal year ending in calendar year y−2 to fiscal year ending in calendar year y−1.
# use BRIC.yearly_helper